#!/usr/bin/python
# -*- coding: UTF-8 -*-
# author : bird.zhang@ximalaya.com

import pandas as pd
from pandas import Timestamp
import matplotlib.pyplot as plt
import matplotlib

# 中文支持
matplotlib.rcParams['font.sans-serif'] = [u'SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False

# 从csv中读取数据
df = pd.read_csv('zhoubao_lookup_0422_0426.csv')
# 将时间戳转化成Timestamp
df['date'] = df.ts.map(lambda x: Timestamp(x, unit='ms', tz='Asia/Shanghai'))
# 取出主播第一次查阅周报的时间
df = df.groupby('userId')['date'].min()
# index和date列交换
df = df.reset_index().set_index('date')
# 给列重命名
df.columns = ['cnt']
df.info()
# 选取时间
df = df['2018-04-23 00:00:00+0800':'2018-04-27 00:00:00+0800']
# 重采样
df = df.resample("H").count()
print(df['cnt'].sum())
# 画图
plt.plot(df)
# for a, b in zip(df.index, df['userId']):
#     plt.text(a, b, b, ha='center', va='bottom', fontsize=8)
# 标题
plt.title(u'主播周报查阅分布图')
# x轴
plt.xlabel(u'日期')
# y轴
plt.ylabel(u'人次')
# df.plot()
plt.show()
